clear all
use ../processed/NLSY79.dta, replace

gen AFQT3_a = max(AFQT3-1/3,0)
gen AFQT3_b = max(AFQT3-2/3,0)
gen female = (sex==2)
gen age_a = max(age-35,0)
gen age_b = max(age-45,0)
local ctrl AFQT3 AFQT3_a AFQT3_b female blk hsp locE locU urban age age_a age_b nsibs

// parental education
forvalues j=1/5{
	gen hgc_m`j' = (hgc_m==`j')
	gen hgc_f`j' = (hgc_f==`j')
	local ctrl `ctrl' hgc_m`j' hgc_f`j'
}
// cohort
forvalues j=1958/1964{
	gen yob`j' = (yob==`j')
	local ctrl `ctrl' yob`j'
}
// Census division
forvalues j=2/9{
	gen divisiona`j' = (divisiona==`j')
	local ctrl `ctrl' divisiona`j'
}

local ctrlx
foreach v in `ctrl'{
	gen x_`v' = hgc_r*`v'
	local ctrlx `ctrlx' x_`v'
}

local xlist
forvalues j=9/18{
	gen x`j' = (hgc_r>=`j')
	local xlist `xlist' x`j'
}

gen xc = max(hgc_r-12,0)
local ctrlxc
foreach v in `ctrl'{
	gen xc_`v' = xc*`v'
	local ctrlxc `ctrlxc' xc_`v'
}

regress logwage `ctrl' [pw=wgt]
predict y_res, resid
regress hgc_r `ctrl' [pw=wgt]
predict x_res, resid
qui foreach v in xc `xlist'{
	regress `v' `ctrl' [pw=wgt]
	predict `v'_res, resid
}

// FS
local iv pub4 tui4 locE17 locU17
regress hgc_r `iv' `ctrl' [pw=wgt]
predict z, xb
regress z `ctrl' [pw=wgt]
predict z_res, resid

// predict E[Y|X,W]
regress logwage hgc_r `ctrlx' `ctrl' [pw=wgt]
predict vhat1, resid
gen mhat1 = _b[hgc_r]*x_res
foreach v in `ctrl'{
	replace mhat1 = mhat1 + `v'*_b[x_`v']*x_res
}
regress z_res hgc_r `ctrlx' `ctrl' [pw=wgt]
predict zhat1, xb

regress logwage `xlist'  `ctrlxc' `ctrlx' `ctrl' [pw=wgt]
predict vhat2, resid
gen mhat2 = 0
foreach v in `ctrl'{
	replace mhat2 = mhat2 + `v'*( _b[x_`v']*x_res+_b[xc_`v']*xc_res ) 
}
foreach v in `xlist'{
	replace mhat2 = mhat2 + _b[`v']*`v'_res
}
regress z_res `xlist'  `ctrlxc' `ctrlx' `ctrl' [pw=wgt]
predict zhat2, xb

matrix A = J(6,2,.)
matrix colnames A = "Coefficient" "StdError"

corr x_res z_res [aw=wgt], covar
scalar m_xx = r(Var_1)
scalar m_xz = r(cov_12)

// OLS
reg logwage hgc_r `ctrl' [pw=wgt], vce(cluster cty_grp)
matrix A[1,1] = _b[hgc_r]
matrix A[1,2] = _se[hgc_r]
predict e_ols, resid

// IV
ivregress 2sls logwage (hgc_r=`iv') `ctrl' [pw=wgt], vce(cluster cty_grp)
matrix A[2,1] = _b[hgc_r]
matrix A[2,2] = _se[hgc_r]
predict e_iv, resid

// IV-OLS difference 
gen temp = e_iv*z_res/m_xz - e_ols*x_res/m_xx
reg temp [pw=wgt], vce(cluster cty_grp)
drop temp
matrix A[3,1] = A[2,1]-A[1,1]
matrix A[3,2] = _se[_cons] 

// covariate weight difference
ivregress 2sls mhat1 (hgc_r=`iv') `ctrl' [pw=wgt], vce(cluster cty_grp)
scalar b_c = _b[hgc_r]
predict e_c, resid
gen temp = ( e_c*z_res + vhat1*zhat1 )/m_xz - e_ols*x_res/m_xx
reg temp [pw=wgt], vce(cluster cty_grp)
drop temp
matrix A[4,1] = b_c - A[1,1]
matrix A[4,2] = _se[_cons]  

// treatment-level weight difference
ivregress 2sls mhat2 (hgc_r=`iv') `ctrl' [pw=wgt], vce(cluster cty_grp)
scalar b_ct = _b[hgc_r]
predict e_ct, resid
gen temp = ( (e_ct*z_res + vhat2*zhat2) - (e_c*z_res + vhat1*zhat1) )/m_xz
reg temp [pw=wgt], vce(cluster cty_grp)
drop temp
matrix A[5,1] = b_ct - b_c
matrix A[5,2] = _se[_cons]  

// marginal effect difference
gen temp = ( e_iv*z_res - (e_ct*z_res + vhat2*zhat2) )/m_xz
reg temp [pw=wgt], vce(cluster cty_grp)
drop temp
matrix A[6,1] = A[2,1] - b_ct
matrix A[6,2] = _se[_cons]  

clear
svmat A, names(col)
gen stat = ""
replace stat = "OLS" in 1
replace stat = "IV" in 2
replace stat = "IV-OLS" in 3
replace stat = "D_CW" in 4
replace stat = "D_TW" in 5
replace stat = "D_ME" in 6
order stat, first
export delimited using ../result/decom_base.csv, replace
